# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
from urllib.parse import unquote

import pymysql
import scrapy
from chinaNews.items import XinhuanetItem
from scrapy.pipelines.images import ImagesPipeline
import logging

# 创建日志记录器
logger = logging.getLogger('error_logger')
logger.setLevel(logging.ERROR)

# 创建文件处理器
file_handler = logging.FileHandler('record.log')

# 设置日志格式
formatter = logging.Formatter('[%(asctime)s] [%(levelname)s] %(message)s')
file_handler.setFormatter(formatter)

# 将处理器添加到记录器
logger.addHandler(file_handler)


# useful for handling different item types with a single interface


class ChinanewsPipeline:
    def process_item(self, item, spider):
        return item


def urllib_download(url):
    from urllib.request import urlretrieve
    url = unquote(url)
    try:
        file_name = "images/" + url.split('/')[-1]
        urlretrieve(url, file_name)
    except:
        # traceback.print_exc()
        with open("./imageDownloadError.txt", "a", encoding='utf-8') as variable_name:
            variable_name.write("\n图片下载失败 %s" % url)


class XinhuanetPipline:
    def __init__(self):
        self.conn = pymysql.connect(host="127.0.0.1", port=3306, user="root", password="123456", database="task_aqielife_cn",
                                    charset='utf8')
        # 使用 cursor() 方法创建一个游标对象 cursor
        self.cursor = self.conn.cursor()

    def close_spider(self, spider):
        print('----------关闭数据库资源-----------')
        # 关闭游标
        self.cursor.close()
        # 关闭连接
        self.conn.close()

    def process_item(self, item, spider):
        # save db content, url,  item['content'], item['url'],
        if 'image_path' in item:
            insert_sql = """
            insert into hg_article_detail(title,content, url,images) value(%s,%s,%s,%s)
            """
            item['image_path']=','.join(item['image_path'])
            # print(item['title'])
            self.cursor.execute(insert_sql, (item['title'], item['content'], item['url'], item['image_path']))
            self.conn.commit()
        else:
            if item.get('title') is not None:
                # 执行相关操作
                print("item", item['title'])
                insert_sql = """
                            insert into hg_article_detail(title,content, url) value(%s,%s,%s)
                            """
                # print(item['title'])
                self.cursor.execute(insert_sql, (item['title'], item['content'], item['url']))
                self.conn.commit()
            else:
                # 处理键不存在的情况
                print("标题不存在")
            


class ImagePipeline(ImagesPipeline):
    def __init__(self, store_uri, download_func=None, settings=None):
        super().__init__(store_uri, download_func, settings)
        self.default_headers = None

    def get_media_requests(self, item, info):
        if item and (isinstance(item, XinhuanetItem)) and 'images' in item:
            for url in item['images']:
                yield scrapy.Request(url=url)
        pass

    def file_path(self, request, response=None, info=None, *, item=None):
        if item and (isinstance(item, XinhuanetItem)) and 'images' in item:
            url = request.url
            return url.split('/')[-1]

    def item_completed(self, results, item, info):
        if item and (isinstance(item, XinhuanetItem)) and 'images' in item:
            image_paths = [x['path'] for ok, x in results if ok]
            if not image_paths:
                return item
            item['image_path'] = image_paths
            return item
        else:
            return item
